clear all

* Set Folder to obtain input database

cd ..\Inputs
use ConsensusData, clear

* Reshape to have the date of the report as a variable (instead of as part of 
* the name of different variables)

reshape long d, i(Country Variable Year Type) j(Date) string

* Drop reports that are missing revisions

drop if d==.

* Generate the day, month and year of the consensus economics report
* and also generate a counter for the report number

gen dayR=substr(Date,1,2)
destring dayR, replace
gen monthletter=substr(Date,3,3)
gen monthR=0
replace monthR=1  if monthletter=="jan"
replace monthR=4  if monthletter=="apr"
replace monthR=7  if monthletter=="jul"
replace monthR=10 if monthletter=="oct"
gen yearR=substr(Date,6,4)
destring yearR, replace
egen tt=group(yearR monthR dayR)

* Drop years that are too far in the future and have no data anyway

drop if Year>2027

* Recall that all variables are in comparable units across countries except
* for the Current account which is in billions of local currency units

* Reorder the dataset

order Country Variable Type Year tt d 
sort Country Variable Type Year tt d

* Generate identifier variable and time variable in order to generate revisions

egen x=group(Country Variable Type Year)
bys Country Variable Type Year: gen t=_n
xtset x t
gen Rev=D.d

* Drop variables that are not needed anymore and reshape to have, for a given 
* year, different estimates across reports in different columns 

drop x t
reshape wide Rev d, i(Country Variable Type tt) j(Year)

* Order variables again to not have d and Rev intertwined

order _all, alpha
order Country Variable Type tt Date dayR monthletter monthR yearR

* Properly place revisions, which are now arranged across years, into several 
* variables depending on how many years-ahead-revision they represent

capture drop revv*
forvalues ri = 0(1)9 {
	gen revv`ri'=.  
}

quiet forvalues ye = 1990(1)2017 {
	forvalues ri = 0(1)6 {
		replace revv`ri'=Rev`=`ye'+`ri'' if yearR==`ye' & (monthR==1 | monthR==4)
		replace revv`ri'=Rev`=`ye'+`ri'+1' if yearR==`ye' & (monthR==7 | monthR==10)
	}
}

* Summarize the information a bit by keeping just some of the revisions and 
* adding others

gen REVI0=revv0/100
gen REVI1=(revv1+revv2+revv3+revv4)/100
gen REVI2=revv5/100
gen REVI3=revv6/100

* Simplify the name of the type of revision to latter rename our variables

replace Type="C" if Type=="Consensus"
replace Type="H" if Type=="High"
replace Type="L" if Type=="Low"
replace Type="S" if Type=="Standard Deviation"

* Drop aux variables

drop Rev* d19* d20* revv*

* Reshape to now have the type of revision in the name of the variable

reshape wide REVI0 REVI1 REVI2 REVI3, ///
	i(Country Variable tt Date dayR monthletter monthR yearR) j(Type) string
	
* Reshape again to now also have the variable that is being studied in the name
* of the revision as well.

reshape wide REVI0C REVI0H REVI0L REVI0S ///
	REVI1C REVI1H REVI1L REVI1S ///
	REVI2C REVI2H REVI2L REVI2S ///
	REVI3C REVI3H REVI3L REVI3S, ///
	i(Country tt Date dayR monthletter monthR yearR) j(Variable) string
	
* Generate Semester indicator

gen semester=0
replace semester=1 if monthR==1 | monthR==4
replace semester=2 if monthR==7 | monthR==10

* Rename some variables to make this dataset compatible with the others

rename Country ISO
rename yearR year
drop tt Date dayR monthletter

* Collapse the dataset to the semester level, we need to follow a slightly
* complicated process because if we just collapse with the standard procedure
* it gives a lot of zeros for revisions that should just be missing

ds ISO year semester, not

local i = 1 
foreach v of varlist `r(varlist)' { 
	local call `call' (sum) `v' (count) nonmiss`i' = `v' 
	local ++i
}

collapse `call', by(ISO year semester)
ds monthR REVI*

local i = 1 
quietly foreach v of varlist `r(varlist)' { 
	replace `v' = . if nonmiss`i' == 0 
	local ++i
}

drop nonmiss* monthR

* Go to output folder and save the database

cd ..\Outputs
save CONREV, replace

* Go to import folder and get database again

cd ..\Inputs
use ConsensusData, clear

reshape long d, i(Country Variable Year Type) j(Date) string

* Drop reports that are missing revisions

drop if d==.

* Generate the day, month and year of the consensus economics report
* and also generate a counter for the report number

gen dayR=substr(Date,1,2)
destring dayR, replace
gen monthletter=substr(Date,3,3)
gen monthR=0
replace monthR=1  if monthletter=="jan"
replace monthR=4  if monthletter=="apr"
replace monthR=7  if monthletter=="jul"
replace monthR=10 if monthletter=="oct"
gen yearR=substr(Date,6,4)
destring yearR, replace

* Drop jan and jul reports that just started in the last couple of years

drop if monthR==1 | monthR==7

* Change months to semester

gen semesterR=0
replace semesterR=1 if monthR==4
replace semesterR=2 if monthR==10
drop monthR monthletter Date dayR

* Keep only consensus estimates

replace Type="C" if Type=="Consensus"
replace Type="H" if Type=="High"
replace Type="L" if Type=="Low"
replace Type="S" if Type=="Standard Deviation"

reshape wide d, i(Country Variable Year yearR semesterR) j(Type) string

* Keep only GDP, Consumption and Investment

keep if Variable=="GDP" | Variable=="CON" | Variable=="INV" ///
      | Variable=="PRO" | Variable=="CPI" | Variable=="BON"
replace Variable="GYlcu" if Variable=="GDP"
replace Variable="GClcu" if Variable=="CON"
replace Variable="GIlcu" if Variable=="INV"
replace Variable="GJlcu" if Variable=="PRO"
replace Variable="GDlcu" if Variable=="CPI"
replace Variable="GBlcu" if Variable=="BON"

* Reshape to have output, consumption and investment, etc as different variables

reshape wide dC dH dL dS, i(Country Year yearR semesterR) j(Variable) string
rename d* *

* Reshape to also have calendar years in variable name, for format to be 
* consistent with the one in OECD and IMF

ds Country Year yearR semesterR, not
reshape wide `r(varlist)', i(Country yearR semesterR) j(Year)
rename yearR year
rename semesterR semester
rename Country ISO

* Set Panel

by ISO: gen tt=_n
egen Country=group(ISO)
xtset Country tt

* Assign the appropriate growth rates of output to each time period

quiet foreach ty in C H L S {
	foreach yp in Y C I J D B {
		forvalues r=0(1)9 {
			forvalues j=0(1)1 {
				gen G`yp'CO`ty'tp`r'gtm`j'=.
				forvalues i=1989(1)2016 {
					replace G`yp'CO`ty'tp`r'gtm`j'=l`j'.`ty'G`yp'lcu`=`i'+`r'' if year==`i' & semester==1
					replace G`yp'CO`ty'tp`r'gtm`j'=l`j'.`ty'G`yp'lcu`=`i'+`r'+1' if year==`i' & semester==2
				}
			}
			forvalues j=1(1)3 {
				gen G`yp'CO`ty'tp`r'gtp`j'=.
				forvalues i=1989(1)2016 {
					replace G`yp'CO`ty'tp`r'gtp`j'=f`j'.`ty'G`yp'lcu`=`i'+`r'' if year==`i' & semester==1
					replace G`yp'CO`ty'tp`r'gtp`j'=f`j'.`ty'G`yp'lcu`=`i'+`r'+1' if year==`i' & semester==2
				}
			}
		}
		forvalues i=1989(1)2026 {
			gen G`yp'CO`ty'last`i'=.
			replace G`yp'CO`ty'last`i'=`ty'G`yp'lcu`i' if year==2016 & semester==2
		}
		forvalues r=0(1)9 {
			gen G`yp'CO`ty'tp`r'gT=.
			forvalues i=1989(1)2016 {
				capture drop G`yp'CO`ty'tp`r'gTtemp
				by Country: egen G`yp'CO`ty'tp`r'gTtemp=mean(G`yp'CO`ty'last`=`i'+`r'+1')
				replace G`yp'CO`ty'tp`r'gT=G`yp'CO`ty'tp`r'gTtemp if year==`i' & semester==2
			}
		}
		drop G`yp'CO`ty'tp?gTtemp G?CO`ty'last*
	}
}

keep ISO year semester G?CO?tp?g*

* Change to output folder and save database

cd ..\Outputs
save CONACT, replace
